R语言绘图50例

大鹏统计

2022-08-24

条形图(barplot)-基因邻接点节点数目

数据导入

#设置路径
path <-paste(dir,"01barplotStat/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T, sep="\t",
              comment.char = "", check.names =FALSE)

数据处理

#计算每个基因的邻接点节点数目
tb=table(c(rt[,1],rt[,2])) 
#按照邻接点节点数目排序
tb=sort(tb,decreasing =TRUE) 

#数据框处理
outTab=as.data.frame(tb)
colnames(outTab)=c("Gene","Count")
#数据保存
write.table(outTab,
            file=paste(path,"statResult.xls",sep =""),
            sep="\t",quote=F,row.names=F)

开始绘图

#定义柱状图显示最大基因数目
showNum=35
if(nrow(tb)<showNum){
    showNum=nrow(outTab)
}
n=as.matrix(tb)[1:showNum,]
#绘制柱状图
pdf(file=paste(dir,"01barplotStat.pdf",sep=""),width=6,height=8)
par(mar=c(5,7,2,3),xpd=TRUE)  #定义边界位置
bar=barplot(n,horiz=TRUE,col="skyblue",names=FALSE,
            xlim=c(0,ceiling(max(n)/5)*5), #ceiling返回大于x的最小整数
            xlab="Number of adjacent nodes") #设置x轴名称
text(x=n*0.95,y=bar,n)  #添加每个基因的邻接点节点数目
text(x=-0.2,y=bar,label=names(n),  #添加基因名称
     xpd=TRUE,
     pos=2)   #1,2,3,4:below,left,above,right 
dev.off()
#图例绘在制图区外,必须设置参数xpd=TRUE
#否则命令正确也不会出图,因为默认xpd=FALSE

条形图(barplot)-基因显著性条形图

数据导入

#设置路径
path <-paste(dir,"02barplotPval/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T, sep="\t",
              comment.char = "", check.names =FALSE)

数据处理

#按FDR排序
labels=rt[order(rt$FDR,decreasing =TRUE),"Term"]
rt$Term = factor(rt$Term,levels=labels)

开始绘图

library(ggplot2)
p=ggplot(data=rt)+
  geom_bar(aes(x=Term, y=Count, fill=FDR),stat='identity')+
  coord_flip() +   #调整条形的方向
  scale_fill_gradient(low="red", high = "blue")+  #连续值颜色填充
  xlab("Term") + 
  ylab("Gene count") + 
  theme(axis.text.x=element_text(color="black",size=10),
        axis.text.y=element_text(color="black", size=10)) +
  scale_y_continuous(expand=c(0,0)) +   #y轴是连续-注意coord_flip调整了方向
  scale_x_discrete(expand=c(0,0))+      #x轴是分类-注意expand=c(0,0)设置原点
  theme_bw()
ggsave(file=paste(dir,"02barplotPval.pdf",sep=""),width=8,height=6)  

#mapping  数据映射,更改映射默认值时使用
#data     数据集,更改映射数据集时使用
#stat     统计变换,这个参数使用频率相对较高
#stat     默认值为bin的情况下,是对数据进行计数
#stat     改成identity此时的条形图的长短表示各分类对y值求和
#position 位置调整,用于调整图层中重叠的点
#width    用于调整条形的宽度

条形图(barplot)-堆积百分比条形图

数据导入

#设置路径
path <-paste(dir,"03barplotPerent/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T, sep="\t",
              comment.char = "", check.names =FALSE)

数据处理

#数据转置
rt=t(rt)  
col=rainbow(nrow(rt),s=0.7,v=0.7)

开始绘图

pdf(file=paste(dir,"03barplotPerent.pdf",sep=""),width=20,height=10)
par(las=1,mar=c(8,5,4,16),mgp=c(3,0.1,0),cex.axis=1.5)
#las=1:always horizontal
#mgp:The margin line
#mar:A numerical vector of the form c(bottom, left, top, right)
a1=barplot(rt,col=col,yaxt="n",ylab="Relative Percent",xaxt="n",cex.lab=1.8)
a2=axis(2,tick=F,labels=F)
axis(2,a2,paste0(a2*100,"%"))
axis(1,a1,labels=F)
par(srt=60,xpd=T);text(a1,-0.02,colnames(rt),adj=1,cex=0.6);par(srt=0)
ytick2 = cumsum(rt[,ncol(rt)])   #累加
ytick1 = c(0,ytick2[-length(ytick2)])
legend(par('usr')[2]*0.98,par('usr')[4],legend=rownames(rt),col=col,pch=15,bty="n",cex=1.3)
dev.off()

barplotGroup

数据导入

#设置路径
path <-paste(dir,"04barplotGroup/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T, sep="\t",
              comment.char = "", check.names =FALSE)

开始绘图

library(ggpubr)  
pdf(file=paste(dir,"04barplotGroup.pdf",sep=""),width=8,height=6)
ggbarplot(rt, x="Term", y="Count", fill = "ONTOLOGY", color = "white", 
          orientation = "horiz",   #横向显示
          palette = "aaas",    #配色方案
          legend = "right",    #图例位置
          sort.val = "asc",    #上升排序,区别于desc
          sort.by.groups=TRUE)+    #按组排序
          scale_y_continuous(expand=c(0, 0)) + scale_x_discrete(expand=c(0,0))
dev.off()

boxplotSort

数据导入

#设置路径
path <-paste(dir,"05boxplotSort/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T, sep="\t",
              comment.char = "", check.names =FALSE)

数据处理

x=colnames(rt)[2]
y=colnames(rt)[3]
colnames(rt)=c("id","Type","expression")

开始绘图

#定义输出图片的排序方式
library(plyr)
library(ggpubr)
med=ddply(rt,"Type",summarise,med=median(expression))
rt$Type=factor(rt$Type, levels=med[order(med[,"med"],decreasing = T),"Type"])

#绘制
col=rainbow(length(levels(factor(rt$Type))))
p=ggboxplot(rt, x="Type", y="expression", color = "Type",
       palette = col,
       ylab=y,
       xlab=x,
       #add = "jitter",   #绘制每个样品的散点
       legend = "right")
pdf(file=paste(dir,"05boxplotSort.pdf",sep=""), width=10, height=6) 
p+rotate_x_text(60) #倾斜角度
dev.off()

boxplotDiff

数据导入

#设置路径
path <-paste(dir,"06boxplotDiff/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T, sep="\t",
              comment.char = "", check.names =FALSE)

数据处理

x=colnames(rt)[2]
y=colnames(rt)[3]
colnames(rt)=c("id","Type","Expression")

开始绘图

library(ggpubr)
#设置比较租
group=levels(factor(rt$Type))
rt$Type=factor(rt$Type, levels=group)
comp=combn(group,2)
my_comparisons=list()
for(i in 1:ncol(comp)){my_comparisons[[i]]<-comp[,i]}

#绘制boxplot
boxplot=ggboxplot(rt, x="Type", y="Expression", color="Type",
                  xlab=x,
                  ylab=y,
                  legend.title=x,
                  palette = c("blue","red"),
                  add = "jitter")+ 
        stat_compare_means(comparisons = my_comparisons)

#输出图片
pdf(file=paste(dir,"06boxplotDiff.pdf",sep=""),width=5,height=4.5)
print(boxplot)
dev.off()

boxplotMulti

数据导入

#设置路径
path <-paste(dir,"07boxplotMulti/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T, sep="\t",
              comment.char = "", check.names =FALSE)

数据处理

x=colnames(rt)[1]
colnames(rt)[1]="Type"
data=melt(rt,id.vars=c("Type"))
colnames(data)=c("Type","Gene","Expression")
library(reshape2)
library(ggpubr)
#绘制boxplot
p=ggboxplot(data, x="Gene", y="Expression", color = "Type", 
         ylab="Gene expression",
         xlab="",
         legend.title=x,
         palette = c("blue","red"),
         width=0.6, add = "none")
p=p+rotate_x_text(60)
p1=p+stat_compare_means(aes(group=Type),
          method="wilcox.test",
          symnum.args=list(cutpoints = c(0, 0.001, 0.01, 0.05, 1), symbols = c("***", "**", "*", " ")),
          label = "p.signif")

#输出图片
pdf(file=paste(dir,"07boxplotMulti.pdf",sep=""),width=8,height=6)
print(p1)
dev.off()

boxplotClinical

数据导入

#设置路径
path <-paste(dir,"08boxplotClinical/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",sep="\t",header=T,check.names=F)

数据处理

x=colnames(rt)[2]
y=colnames(rt)[3]
colnames(rt)=c("id","Type","Expression")

开始绘图

library(ggpubr) 
#设置比较租
group=levels(factor(rt$Type))
rt$Type=factor(rt$Type, levels=group)
comp=combn(group,2)
my_comparisons=list()
for(i in 1:ncol(comp)){my_comparisons[[i]]<-comp[,i]}

#绘制boxplot
boxplot=ggboxplot(rt, x="Type", y="Expression", color="Type",
                  xlab=x,
                  ylab=y,
                  legend.title=x,
                  add = "jitter")+ 
        stat_compare_means(comparisons = my_comparisons)
#输出图片
pdf(file=paste(dir,"08boxplotClinical.pdf",sep=""), width=5.5, height=5)
print(boxplot)
dev.off()

boxplotFacet

数据导入

#设置路径
path <-paste(dir,"09boxplotFacet/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T,sep="\t",check.names=F,row.names=1)

数据处理

x=colnames(rt)[1]
colnames(rt)[1]="Type"

#差异分析
geneSig=c("")
for(gene in colnames(rt)[2:ncol(rt)]){
    rt1=rt[,c(gene,"Type")]
    colnames(rt1)=c("expression","Type")
    p=1
    if(length(levels(factor(rt1$Type)))>2){
        test=kruskal.test(expression ~ Type, data = rt1)
        p=test$p.value
    }else{
        test=wilcox.test(expression ~ Type, data = rt1)
        p=test$p.value
    }
    Sig=ifelse(p<0.001,"***",ifelse(p<0.01,"**",ifelse(p<0.05,"*","")))
    geneSig=c(geneSig,Sig)
}
colnames(rt)=paste0(colnames(rt),geneSig)

#把数据转换成ggplot2输入文件
library(reshape2)
data=melt(rt,id.vars=c("Type"))
colnames(data)=c("Type","Gene","Expression")

开始绘制

#绘制
library(ggplot2)
p1=ggplot(data,aes(x=Type,y=Expression,fill=Type))+
    guides(fill=guide_legend(title=x))+
    labs(x = x, y = "Gene expression")+
    geom_boxplot()+ facet_wrap(~Gene,nrow =1)+ theme_bw()+
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

#输出
pdf(file=paste(dir,"09boxplotFacet.pdf",sep=""), width=9, height=5)
print(p1)
dev.off()

vioplot

数据导入

#设置路径
path <-paste(dir,"10vioplot/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",header=T,sep="\t",check.names=F)

数据处理

x=colnames(rt)[2]
y=colnames(rt)[3]
colnames(rt)=c("id","Type","Expression")

开始绘图

#设置比较组
group=levels(factor(rt$Type))
rt$Type=factor(rt$Type, levels=group)
comp=combn(group,2)
my_comparisons=list()
for(i in 1:ncol(comp)){my_comparisons[[i]]<-comp[,i]}


#绘制
library(ggpubr)           
pdf(file=paste(dir,"10vioplot-1.pdf",sep=""), width=6, height=5)
ggviolin(rt, x="Type", y="Expression", fill = "Type", 
         xlab=x, ylab=y,
         legend.title=x,
         add = "boxplot", add.params = list(fill="white"))+ 
         stat_compare_means(comparisons = my_comparisons)
dev.off()

pdf(file=paste(dir,"10vioplot-2.pdf",sep=""), width=6, height=5)
ggviolin(rt, x="Type", y="Expression", fill = "Type", 
         xlab=x, ylab=y,
         legend.title=x,
         add = "boxplot", add.params = list(fill="white"))+ 
         stat_compare_means(comparisons = my_comparisons,
                            symnum.args=list(cutpoints = c(0,0.001,0.01,0.05,1),
                                             symbols = c("***","**","*","ns")),
                            label = "p.signif")
dev.off()

vioplotMulti

数据导入

#设置路径
path <-paste(dir,"11vioplotMulti/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",header=T,sep="\t",check.names=F,row.names=1)

数据处理

x=colnames(rt)[1]
colnames(rt)[1]="Type"
#把数据转换成ggplot2数据文件
library(reshape2)
data=melt(rt,id.vars=c("Type"))
colnames(data)=c("Type","Gene","Expression")

开始绘图

#绘制小提琴图
library(ggpubr)
p=ggviolin(data, x="Gene", y="Expression", color = "Type", 
         ylab="Gene expression",
         xlab=x,
         legend.title=x,
         add.params = list(fill="white"),
         palette = c("blue","red"),
         width=1, add = "boxplot")
p=p+rotate_x_text(60)
p1=p+stat_compare_means(aes(group=Type),
          method="wilcox.test",
          symnum.args=list(cutpoints = c(0, 0.001, 0.01, 0.05, 1), 
                           symbols = c("***", "**", "*", " ")),
          label = "p.signif")

#输出
pdf(file=paste(dir,"11vioplotMulti.pdf",sep=""), width=6, height=5)
print(p1)
dev.off()

vioplotFacet

数据导入

#设置路径
path <-paste(dir,"12vioplotFacet/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T,sep="\t",check.names=F,row.names=1)

数据处理

x=colnames(rt)[1]
colnames(rt)[1]="Type"

#差异分析
geneSig=c("")
for(gene in colnames(rt)[2:ncol(rt)]){
    rt1=rt[,c(gene,"Type")]
    colnames(rt1)=c("expression","Type")
    p=1
    if(length(levels(factor(rt1$Type)))>2){
        test=kruskal.test(expression ~ Type, data = rt1)
        p=test$p.value
    }else{
        test=wilcox.test(expression ~ Type, data = rt1)
        p=test$p.value
    }
    Sig=ifelse(p<0.001,"***",ifelse(p<0.01,"**",ifelse(p<0.05,"*","")))
    geneSig=c(geneSig,Sig)
}
colnames(rt)=paste0(colnames(rt),geneSig)

#把数据转换成ggplot2文件
library(reshape2)
data=melt(rt,id.vars=c("Type"))
colnames(data)=c("Type","Gene","Expression")

开始绘图

#绘制
p1=ggplot(data,aes(x=Type,y=Expression,fill=Type))+
    guides(fill=guide_legend(title=x))+
    labs(x = x, y = "Gene expression")+
    geom_violin()+ geom_boxplot(width=0.2,position=position_dodge(0.9))+ 
    facet_wrap(~Gene,nrow =1)+ theme_bw()+
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

#出图
pdf(file=paste(dir,"12vioplotFacet.pdf",sep=""), width=9, height=5)
print(p1)
dev.off()

pairDiff

数据导入

#设置路径
path <-paste(dir,"13pairDiff/",sep="")
#设置工作目录
setwd(path)
#读取数据
data=read.table("input.txt",sep="\t",header=T,check.names=F,row.names=1)

数据处理

ylab="BRCA1 expression"      #y轴名称
cond1=colnames(data)[1]
cond2=colnames(data)[2]

开始绘图

#绘制
library("ggpubr")  
pdf(file=paste(dir,"13pairDiff-1.pdf",sep=""), width=5, height=4)
ggpaired(data, cond1 = cond1, cond2 = cond2, fill = "condition", 
         palette = "jco",
         legend.title="Condition",xlab="",ylab = ylab)+
  stat_compare_means(paired = TRUE, label = "p.format", label.x = 1.35)
dev.off()


pdf(file=paste(dir,"13pairDiff-2.pdf",sep=""), width=5, height=4)
ggpaired(data, cond1 = cond1, cond2 = cond2, fill = "condition", 
         palette = "jco",
         legend.title="Condition",xlab="",ylab = ylab)+
  stat_compare_means(paired = TRUE, 
                     symnum.args=list(cutpoints = c(0, 0.001, 0.01, 0.05, 1), 
                     symbols = c("***", "**", "*", "ns")),
                     label = "p.signif",label.x = 1.35)
dev.off()

ggballoonplot

数据导入

#设置路径
path <-paste(dir,"14ggballoonplot/",sep="")
#设置工作目录
setwd(path)
#读取数据
data=read.table("input.txt",header=T,sep="\t",check.names=F,row.names=1)

开始绘图

#绘制气泡图
library(ggpubr)   
pdf(file=paste(dir,"14ggballoonplot.pdf",sep=""),width=8,height=7)
ggballoonplot(data, fill = 'value')+gradient_fill(c('blue', 'white', 'red'))
dev.off()

15Deviation

数据导入

#设置路径
path <-paste(dir,"15Deviation/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",sep="\t",header=T,check.names=F)

数据处理

x=colnames(rt)[1]
y=colnames(rt)[2]
colnames(rt)=c("Name","Value")
rt$Regulate=factor(ifelse(rt$Value<0, "Down", "Up"), levels = c("Up", "Down"))

开始绘图

library(ggpubr)  
pdf(file=paste(dir,"15Deviation.pdf",sep=""),width=6,height=5)
ggbarplot(rt, x="Name", y="Value", fill = "Regulate", color = "white", 
          palette = "jco", 
          sort.val = "desc", sort.by.groups = FALSE, 
          x.text.angle=90, 
          xlab = x, ylab = y,
          legend.title="Regulate", rotate=TRUE, ggtheme = theme_minimal())   
#rotatex设置x/y对调
dev.off()

heatmap

数据导入

#设置路径
path <-paste(dir,"16heatmap/",sep="")
#设置工作目录
setwd(path)
#读取数据
#读取文件
rt=read.table("input.txt" ,header=T,sep="\t",row.names=1,check.names=F)  
#读取样本属性文件
ann=read.table("group.txt",header=T,sep="\t",row.names=1,check.names=F)    

开始绘制

library(pheatmap)  
pdf(file=paste(dir,"16heatmap.pdf",sep=""),width=6,height=5.5)
pheatmap(rt,
         annotation=ann,
         cluster_cols = T,
         color = colorRampPalette(c("blue", "white", "red"))(50),
         show_colnames = T,
         scale="row",  #矫正
         #border_color ="NA",
         fontsize = 8,
         fontsize_row=6,
         fontsize_col=6)
dev.off()

cliHeatmap

数据导入

#设置路径
path <-paste(dir,"17cliHeatmap/",sep="")
#设置工作目录
setwd(path)
#读取数据
#读取表达文件
rt=read.table("input.txt" , sep="\t", header=T, row.names=1, check.names=F)
#读取临床文件
Type=read.table("clinical.txt", sep="\t", header=T, row.names=1, check.names=F)      

数据处理

var="Risk"     #按照临床性状(此处为风险)对样品排序
#样品取交集
sameSample=intersect(colnames(rt),row.names(Type))
rt=rt[,sameSample]
Type=Type[sameSample,]
Type=Type[order(Type[,var]),]   #按临床性状排序
rt=rt[,row.names(Type)]

开始绘图

#绘制热图
pdf(file=paste(dir,"17cliHeatmap.pdf",sep=""),height=5,width=8)
pheatmap(rt, annotation=Type, 
         color = colorRampPalette(c("blue", "white", "red"))(50),
         cluster_cols =F,    #是否聚类
         scale="row",   #基因矫正
         show_colnames=F,
         fontsize=7.5,
         fontsize_row=7,
         fontsize_col=5)
dev.off()

vol

数据导入

#设置路径
path <-paste(dir,"18vol/",sep="")
#设置工作目录
setwd(path)
#读取数据
#读取文件
rt=read.table("input.txt" ,sep="\t",header=T,check.names=F)

数据处理

logFCfilter=1     #logFC过滤阈值
fdrFilter=0.05    #矫正后p值阈值
#定义显著性
Significant=ifelse((rt$fdr<fdrFilter & abs(rt$logFC)>logFCfilter), 
                   ifelse(rt$logFC>logFCfilter,"Up","Down"), "Not")

开始绘图

#绘制火山图
library(ggplot2)  
p = ggplot(rt, aes(logFC, -log10(fdr)))+
    geom_point(aes(col=Significant))+
    scale_color_manual(values=c("green", "black", "red"))+
    labs(title = " ")+
    theme(plot.title = element_text(size = 16, hjust = 0.5, face = "bold"))
p=p+theme_bw()

#出图
pdf(file=paste(dir,"18vol.pdf",sep=""),width=5.5,height=4.5)
print(p)
dev.off()

venn

数据导入

#设置路径
path <-paste(dir,"19venn/",sep="")
#设置工作目录
setwd(path)
#读取数据
outFile="intersectGenes.txt"       #输出交集基因文件
files=dir()                        #获取目录下所有文件
files=grep("txt$",files,value=T)   #提取TXT结尾的文件
geneList=list()
#读取所有txt文件中的基因信息,保存到GENELIST
for(i in 1:length(files)){
    inputFile=files[i]
    if(inputFile==outFile){next}
    rt=read.table(inputFile,header=F)        #读取
    geneNames=as.vector(rt[,1])              #提取基因名
    geneNames=gsub("^ | $","",geneNames)     #去掉基因首尾的空格
    uniqGene=unique(geneNames)               #基因取unique
    header=unlist(strsplit(inputFile,"\\.|\\-"))
    geneList[[header[1]]]=uniqGene
    uniqLength=length(uniqGene)
    print(paste(header[1],uniqLength,sep=" "))
}

开始绘图

#绘制venn
library(VennDiagram)  
venn.plot=venn.diagram(geneList,filename=NULL,fill=rainbow(length(geneList)) )
pdf(file=paste(dir,"19venn.pdf",sep=""),width=5, height=5)
grid.draw(venn.plot)
dev.off()

#保存交集基因
intersectGenes=Reduce(intersect,geneList)
write.table(file=paste(dir,"intersectGenes.txt",sep=""),
            intersectGenes,sep="\t",quote=F,col.names=F,row.names=F)

UpSetR

数据导入

#设置路径
path <-paste(dir,"20UpSetR/",sep="")
#设置工作目录
setwd(path)
#读取数据
library(UpSetR)
files=dir()                          #获取目录下所有文件
files=grep("txt$",files,value=T)     #提取.txt结尾的文件
geneList=list()

#获取所有txt文件中的基因信息,保存到geneList
for(i in 1:length(files)){
    inputFile=files[i]
    if(inputFile==outFile){next}
    rt=read.table(inputFile,header=F)         #读取输入文件
    geneNames=as.vector(rt[,1])               #提取基因名称
    geneNames=gsub("^ | $","",geneNames)      #去掉基因首尾的空格
    uniqGene=unique(geneNames)                #基因取unique,唯一基因列表
    header=unlist(strsplit(inputFile,"\\.|\\-"))
    geneList[[header[1]]]=uniqGene
    uniqLength=length(uniqGene)
    print(paste(header[1],uniqLength,sep=" "))
}

开始绘图

#绘制UpSet
upsetData=fromList(geneList)
pdf(file=paste(dir,"20UpSetR.pdf",sep=""),onefile = FALSE,width=9,height=6)
upset(upsetData,
      nsets = length(geneList),               #展示多少个数据
      nintersects = 50,                       #展示基因集数目
      order.by = "freq",                      #按照数目排序
      show.numbers = "yes",                   #柱状图上方是否显示数值
      number.angles = 20,                     #字体角度
      point.size = 2,                         #点的大小
      matrix.color="red",                     #交集点颜色
      line.size = 0.8,                        #线条粗细
      mainbar.y.label = "Gene Intersections", 
      sets.x.label = "Set Size")
dev.off()
#保存交集文件
intersectGenes=Reduce(intersect,geneList)
write.table(file=paste(dir,"intersectGenes.txt",sep=""),
            intersectGenes,sep="\t",quote=F,col.names=F,row.names=F)

cor

数据导入

#设置路径
path <-paste(dir,"21cor/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt" ,sep="\t",header=T,check.names=F,row.names=1)

数据处理

gene1="MTMR14"             #第一个基因名字
gene2="PRKCD"              #第二个基因名字
#读取输入文件,提取基因表达量
x=as.numeric(rt[gene1,])
y=as.numeric(rt[gene2,])

开始绘图

library(ggplot2)
library(ggpubr)
library(ggExtra)
#相关性分析
df1=as.data.frame(cbind(x,y))
corT=cor.test(x,y,method="spearman")
cor=corT$estimate
pValue=corT$p.value
p1=ggplot(df1, aes(x, y)) + 
            xlab(gene1)+ylab(gene2)+
            geom_point()+ geom_smooth(method="lm",formula = y ~ x) + theme_bw()+
            stat_cor(method = 'spearman', aes(x =x, y =y))
p2=ggMarginal(p1, type = "density", xparams = list(fill = "orange"),yparams = list(fill = "blue"))

#出图1
pdf(file=paste(dir,"21cor-1.pdf",sep=""),width=5,height=4.8)
print(p1)
dev.off()
#出图2
pdf(file=paste(dir,"21cor-2.pdf",sep=""),width=5,height=5)
print(p2)
dev.off()

corrplot

数据导入

#设置路径
path <-paste(dir,"22corrplot/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",sep="\t",header=T,row.names=1) 

数据处理

rt=t(rt)      #数据转置
M=cor(rt)     #相关型矩阵

开始绘图

#绘制相关性图形
library(corrplot)  
pdf(file=paste(dir,"22corrplot-1.pdf",sep=""),width=7,height=7)
corrplot(M,
         method = "circle",
         order = "hclust", #聚类
         type = "upper",
         col=colorRampPalette(c("green", "white", "red"))(50)
         )
dev.off()
#第二个图
pdf(file=paste(dir,"22corrplot-2.pdf",sep=""),width=8,height=8)
corrplot(M,
         order="original",
         method = "color",
         number.cex = 0.7, #相关系数
         addCoef.col = "black",
         diag = TRUE,
         tl.col="black",
         col=colorRampPalette(c("blue", "white", "red"))(50))
dev.off()

corCircos

数据导入

options(stringsAsFactors=F)
#设置路径
path <-paste(dir,"23corCircos/",sep="")
#设置工作目录
setwd(path)
#读取数据
#读取输入文件
rt=read.table("input.txt",sep="\t",header=T,check.names=F,row.names=1)

数据处理

rt=t(rt)  #转置
#计算基因间相关系数
cor1=cor(rt)
#设置图形颜色
col = c(rgb(1,0,0,seq(1,0,length=32)),rgb(0,1,0,seq(0,1,length=32)))
cor1[cor1==1]=0  #删掉相关性=1
c1 = ifelse(c(cor1)>=0,rgb(1,0,0,abs(cor1)),rgb(0,1,0,abs(cor1)))
col1 = matrix(c1,nc=ncol(rt))

开始绘制

library(corrplot)
library(circlize)
#绘制圈图
pdf(file=paste(dir,"23corCircos.pdf",sep=""),width=7,height=7)
par(mar=c(2,2,2,4))
circos.par(gap.degree=c(3,rep(2, nrow(cor1)-1)), start.degree = 180)
chordDiagram(cor1, grid.col=rainbow(ncol(rt)),
             col=col1,transparency = 0.5,symmetric = T)
par(xpd=T)
colorlegend(col, vertical = T,labels=c(1,0,-1),
            xlim=c(1.1,1.3),ylim=c(-0.4,0.4))      
dev.off()
circos.clear()

corNetwork

数据导入

#设置路径
path <-paste(dir,"24corNetwork/",sep="")
#设置工作目录
setwd(path)
#读取数据
data=read.table("input.txt",header=T,sep="\t",row.names=1,check.names=F)

数据处理

cordata=cor(t(data))
cutoff=0.4  #相关性阈值                  
#保留相关性矩阵的一半
mydata = cordata
upper = upper.tri(mydata)
mydata[upper] = NA
#把相关性矩阵转换为数据框
df = data.frame(gene=rownames(mydata),mydata)
library(reshape2)
dfmeltdata = melt(df,id="gene")
dfmeltdata = dfmeltdata[!is.na(dfmeltdata$value),]
dfmeltdata = dfmeltdata[dfmeltdata$gene!=dfmeltdata$variable,]
dfmeltdata = dfmeltdata[abs(dfmeltdata$value)>cutoff,]

进一步定义

library(igraph)
#定义网络图的节点和边
corweight = dfmeltdata$value
weight = corweight+abs(min(corweight))+5
d = data.frame(p1=dfmeltdata$gene,p2=dfmeltdata$variable,weight=dfmeltdata$value)
g = graph.data.frame(dfmeltdata,directed = FALSE)
#设置颜色,节点大小,字体大小
E(g)$color = ifelse(corweight>0,rgb(254/255,67/255,101/255,abs(corweight)),rgb(0/255,0/255,255/255,abs(corweight)))
V(g)$size = 8
V(g)$shape = "circle"
V(g)$lable.cex = 1.2
V(g)$color = "white"
E(g)$weight = weight

开始绘图

#可视化
pdf(file=paste(dir,"24corNetwork.pdf",sep=""),width=7,height=6)
layout(matrix(c(1,1,1,0,2,0),byrow=T,nc=3),height=c(6,1),width=c(3,4,3))
par(mar=c(1.5,2,2,2))
vertex.frame.color = NA
plot(g,layout=layout_nicely,
     vertex.label.cex=V(g)$lable.cex,
     edge.width = E(g)$weight,edge.arrow.size=0,
     vertex.label.color="black",
     vertex.frame.color=vertex.frame.color,
     edge.color=E(g)$color,
     vertex.label.cex=V(g)$lable.cex,vertex.label.font=2,
     vertex.size=V(g)$size,edge.curved=0.4)

#绘制图例
color_legend = c(rgb(254/255,67/255,101/255,seq(1,0,by=-0.01)),
                 rgb(0/255,0/255,255/255,seq(0,1,by=0.01)))
par(mar=c(2,2,1,2),xpd = T,cex.axis=1.6,las=1)
barplot(rep(1,length(color_legend)),border = NA, 
        space = 0,ylab="",xlab="",
        xlim=c(1,length(color_legend)),
        horiz=FALSE,
        axes = F, 
        col=color_legend,main="")
axis(3,at=seq(1,length(color_legend),length=5),
     c(1,0.5,0,-0.5,-1),
     tick=FALSE)
dev.off()

radar

数据导入

#设置路径
path <-paste(dir,"25radar/",sep="")
#设置工作目录
setwd(path)
#读取数据
data=read.table("input.txt" ,header=T,sep="\t",row.names=1,check.names=F)    

数据处理

col="red"                 #定义颜色
data=as.data.frame(t(data))       #转置
maxValue=ceiling(max(abs(data))*10)/10  #设置刻度
data=rbind(rep(maxValue,ncol(data)),rep(-maxValue,ncol(data)),data)

开始绘图

library(fmsb) 
pdf(file=paste(dir,"25radar.pdf",sep=""),height=7,width=7)
radarchart( data, axistype=1, 
            pcol=col,                    #线条颜色
            plwd=2 ,                     #线条粗细
            plty=1,                      #虚线,实线
            cglcol="grey",               #背景线条颜色
            cglty=1,                     #背景线条虚线,实线1
            caxislabels=seq(-maxValue,maxValue,maxValue/2),    #坐标刻度
            cglwd=1.2,                   #背景线条粗细
            axislabcol="blue",           #刻度颜色
            vlcex=0.8                    #字体大小
)
dev.off()

ggalluvial

数据导入

#设置路径
path <-paste(dir,"26ggalluvial/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header = T, sep="\t", check.names=F)

数据处理

library(ggalluvial)
corLodes=to_lodes_form(rt, axes = 1:ncol(rt), id = "Cohort")

开始绘图

library(dplyr)
library(ggplot2)
pdf(file=paste(dir,"26ggalluvial.pdf",sep=""), width=7,height=6)
mycol <- rep(c("#029149","#6E568C","#E0367A","#D8D155","#223D6C","#D20A13",
               "#431A3D","#91612D","#FFD121","#088247","#11AA4D","#58CDD9",
               "#7A142C","#5D90BA","#64495D","#7CC767"),15)
ggplot(corLodes, aes(x = x, 
                     stratum = stratum, 
                     alluvium = Cohort,
                     fill = stratum, 
                     label = stratum)) +
  scale_x_discrete(expand =c(0,0)) +
  #用aes.flow控制线调颜色,forward说明颜色和前面一致,backward说明与后面一致
  geom_flow(width = 2/10,aes.flow = "forward") + 
  geom_stratum(alpha = .9,width = 2/10) +
    scale_fill_manual(values = mycol) +
    #size = 2代表基因名字大小
  geom_text(stat = "stratum",
            size = 2,
             color="black") +
  xlab("") + 
  ylab("") + 
  theme_bw() + 
  theme(axis.line = element_blank(),
        axis.ticks = element_blank(),
          axis.text.y = element_blank()) + 
  theme(panel.grid =element_blank()) + 
    theme(panel.border = element_blank()) + 
    ggtitle("") + guides(fill = FALSE)                            
#dev.off()

pie

数据导入

#设置路径
path <-paste(dir,"27pie/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",header=T,sep="\t",check.names=F)

数据处理

library("plotrix")         
x=rt[,2]
labels=as.character(rt[,1])
#求个部分百分率
piepercent=paste(round(100*x/sum(x), 2), "%")
labels=paste0(labels,"\n",piepercent)

开始绘图

pdf(file=paste(dir,"27pie.pdf",sep=""),width=7,height=6)
pie3D(x, labels = labels, height=0.1, labelcex=0.8,
      explode =0.1, theta=0.85)   #explode:离中心距离  theta:观看角度    
dev.off()

bubble

数据导入

#设置路径
path <-paste(dir,"28bubble/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T, sep="\t", check.names=F) 
#输入文件需要四列信息
#Term: GO或名称
#Ratio: 基因比例
#Count: 富集数目
#FDR: 矫正pֵ

数据处理

#按照Ratio对Term排序
labels=rt[order(rt$Ratio),"Term"]
rt$Term = factor(rt$Term,levels=labels)

开始绘制

library(ggplot2)  
p = ggplot(rt,aes(Ratio, Term)) + 
    geom_point(aes(size=Count, color=FDR))
p1 = p + 
     scale_colour_gradient(low="red",high="blue") + 
     labs(color="FDR",size="Count",x="Gene ratio",y="Term")+
     theme(axis.text.x=element_text(color="black", size=10),axis.text.y=element_text(color="black", size=10)) + 
     theme_bw()
ggsave(file=paste(dir,"28bubble.pdf",sep=""), width=7, height=5)      #保存

Lollipop

数据导入

#设置路径
path <-paste(dir,"29Lollipop/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T,sep="\t",check.names=F)
#输入文件需要有三列信息
#ONTOLOGY: GO分类(BP, CC, MF)
#Term: GO名称
#Count: 富集在每个GO上的数目

开始绘图

#绘制棒棒糖图
library(ggpubr)  
pdf(file=paste(dir,"29Lollipop.pdf",sep=""),width=7,height=6)
ggdotchart(rt, x="Term", y="Count", color = "ONTOLOGY",group = "ONTOLOGY", 
          palette = "aaas",     #配色方案
          legend = "right",     #图例位置
          sorting = "descending",   #上升排序,区别于desc
          add = "segments",    #增加线段
          rotate = TRUE,       #横向显示
          dot.size = 5,        #圆圈大小
          label = round(rt$Count),   #圆圈内数值
          font.label = list(color="white",size=9, vjust=0.5),   #圆圈内数值字体设置
          ggtheme = theme_pubr())
dev.off()

GOcircos

数据导入

#设置路径
path <-paste(dir,"30GOcircos/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", sep="\t",header=T,check.names=F)      

加载R包

#install.packages("colorspace")
#install.packages("stringi")
#install.packages("ggplot2")
#install.packages("digest")
#install.packages("GOplot")

#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("org.Hs.eg.db")  
#BiocManager::install("DOSE")
#BiocManager::install("clusterProfiler")
#BiocManager::install("enrichplot")
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(ggplot2)
library(GOplot)

数据处理-过滤

pFilter=0.05
adjPfilter=0.05
genes=as.vector(rt[,1])
entrezIDs=mget(genes, org.Hs.egSYMBOL2EG, ifnotfound=NA)    #找出基因对应的id
entrezIDs=as.character(entrezIDs)
rt=cbind(rt,entrezID=entrezIDs)
rt=rt[is.na(rt[,"entrezID"])==F,]                           #删掉没有基因ID的
gene=rt$entrezID

GO

#GO
GO=enrichGO(gene = gene,
            OrgDb = org.Hs.eg.db, 
            pvalueCutoff =1, 
            qvalueCutoff = 1,
            ont="all",
            readable =T)
GO=as.data.frame(GO)
GO=GO[(GO$pvalue<pFilter & GO$p.adjust<adjPfilter),]
write.table(GO,file=paste(path,"GO.txt",sep=""),sep="\t",quote=F,row.names = F)
go=data.frame(Category = GO$ONTOLOGY,ID = GO$ID,Term = GO$Description, 
              Genes = gsub("/", ", ", GO$geneID), adj_pval = GO$p.adjust)
#logFC
genelist=data.frame(ID = rt$gene, logFC = rt$logFC)
row.names(genelist)=genelist[,1]
circ <- circle_dat(go, genelist)
termNum = 5                                     #限定GO数目
termNum=ifelse(nrow(go)<termNum,nrow(go),termNum)
geneNum = nrow(genelist)                        #限定基因数目

开始绘图

chord <- chord_dat(circ, genelist[1:geneNum,], go$Term[1:termNum])
#圈图1
pdf(file=paste(dir,"30GOcircos-1.pdf",sep=""),width = 10,height = 10.2)
GOChord(chord, 
        space = 0.001,           #基因之间的间距
        gene.order = 'logFC',    #排序基因
        gene.space = 0.25,       #基因离圆圈距离
        gene.size = 3,           #
        border.size = 0.1,     
        process.label = 7)       #GO名称大小
dev.off()

#圈图2
pdf(file=paste(dir,"30GOcircos-2.pdf",sep=""),width = 12,height = 9)
GOCluster(circ, as.character(go[1:termNum,3]))
dev.off()

KEGGcircos

数据导入

#设置路径
path <-paste(dir,"31KEGGcircos/",sep="")
#设置工作目录
setwd(path)
#读取数据

rt=read.table("input.txt",sep="\t",header=T,check.names=F) #读取id.txt文件

加载R包

#install.packages("colorspace")
#install.packages("stringi")
#install.packages("ggplot2")
#install.packages("digest")
#install.packages("GOplot")

#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("org.Hs.eg.db")
#BiocManager::install("DOSE")
#BiocManager::install("clusterProfiler")
#BiocManager::install("enrichplot")

library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(ggplot2)
library(GOplot)

数据处理-过滤

pFilter=0.05
adjPfilter=0.05
genes=as.vector(rt[,1])                                     #提取基因列表
entrezIDs=mget(genes, org.Hs.egSYMBOL2EG, ifnotfound=NA)    #找出基因对应id
entrezIDs=as.character(entrezIDs)
rt=cbind(rt,entrezID=entrezIDs)
rt=rt[is.na(rt[,"entrezID"])==F,]                           #去除id为NA的基因
gene=rt$entrezID

KEGG富集分析

kk=enrichKEGG(gene = gene, organism = "hsa", pvalueCutoff =1, qvalueCutoff =1)
KEGG=as.data.frame(kk)
KEGG=KEGG[(KEGG$pvalue<pFilter & KEGG$p.adjust<adjPfilter),]
KEGG$geneID=as.character(sapply(KEGG$geneID,function(x)
  paste(rt$gene[match(strsplit(x,"/")[[1]],as.character(rt$entrezID))],
        collapse="/")))
write.table(KEGG,file=paste(path,"KEGG.txt",sep=""),
            sep="\t",quote=F,row.names = F)       #保存富集结果

获取KEGG信息

kegg=data.frame(Category = "ALL",ID = KEGG$ID,
                Term = KEGG$Description, 
                Genes = gsub("/", ", ", KEGG$geneID), 
                adj_pval = KEGG$p.adjust)
#读取基因的logFC
genelist <- data.frame(ID = rt$gene, logFC = rt$logFC)
row.names(genelist)=genelist[,1]

circ <- circle_dat(kegg, genelist)
termNum = 5                                              #限定Term数
termNum=ifelse(nrow(kegg)<termNum,nrow(kegg),termNum)
geneNum = nrow(genelist)                                 #限定基因数目

开始绘图

chord <- chord_dat(circ, genelist[1:geneNum,], kegg$Term[1:termNum])
pdf(file=paste(dir,"31KEGGcircos-1.pdf",sep=""),width = 11,height = 11.2)
GOChord(chord, 
        space = 0.001,           #基因之间的间距
        gene.order = 'logFC',    #按照logFC对基因排序
        gene.space = 0.25,       #基因名跟圆圈的相对距离
        gene.size = 4,           #基因名字体大小
        border.size = 0.1,       #线条粗细
        process.label = 7)       #term字体大小
dev.off()
#绘制聚类圈图
pdf(file=paste(dir,"31KEGGcircos-2.pdf",sep=""),width = 12,height = 9)
GOCluster(circ, as.character(kegg[1:termNum,3]))
dev.off()

multiGSEA

数据导入

#设置路径
path <-paste(dir,"32multiGSEA/",sep="")
#设置工作目录
setwd(path)
#读取数据
files=grep(".xls",dir(),value=T)                            #读取目录下的所有xls文件
data = lapply(files,read.delim)                             #读取每个文件
names(data) = files
library(plyr)
dataSet = ldply(data, data.frame)
dataSet$pathway = gsub(".xls","",dataSet$.id)               #将文件后缀删掉

开始绘图

gseaCol=c("#58CDD9","#7A142C","#5D90BA","#431A3D","#91612D","#6E568C",
          "#E0367A","#D8D155","#64495D","#7CC767","#223D6C","#D20A13",
          "#FFD121","#088247","#11AA4D")
library(ggplot2)
library(grid)
library(gridExtra)

#富集图
pGsea=ggplot(dataSet,aes(x=RANK.IN.GENE.LIST,y=RUNNING.ES,colour=pathway,group=pathway))+
  geom_line(size = 1.5) + 
  scale_color_manual(values = gseaCol[1:nrow(dataSet)]) +   
  labs(x = "", y = "Enrichment Score", title = "") + 
  scale_x_continuous(expand = c(0, 0)) + 
  scale_y_continuous(expand = c(0, 0),
                     limits = c(min(dataSet$RUNNING.ES - 0.02), 
                                max(dataSet$RUNNING.ES + 0.02))) +   
  theme_bw() + theme(panel.grid = element_blank()) + 
  theme(panel.border = element_blank()) + 
  theme(axis.line = element_line(colour = "black")) + 
  theme(axis.line.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.x = element_blank()) + 
  geom_hline(yintercept = 0) + 
  guides(colour = guide_legend(title = NULL)) + 
  theme(legend.background = element_blank()) + 
  theme(legend.key = element_blank())+theme(legend.key.size=unit(0.5,'cm'))

#基因排序图
pGene=ggplot(dataSet,aes(RANK.IN.GENE.LIST,pathway,colour=pathway))+
  geom_tile()+
  scale_color_manual(values = gseaCol[1:nrow(dataSet)]) + 
  labs(x = "High expression<-->Low expression", y = "", title = "") + 
  scale_x_discrete(expand = c(0, 0)) + 
  scale_y_discrete(expand = c(0, 0)) +  
  theme_bw() + 
  theme(panel.grid = element_blank()) + 
  theme(panel.border = element_blank()) + 
  theme(axis.line = element_line(colour = "black"))+
  theme(axis.line.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = element_blank())+ 
  guides(color=FALSE)

gGsea = ggplot_gtable(ggplot_build(pGsea))
gGene = ggplot_gtable(ggplot_build(pGene))
maxWidth = grid::unit.pmax(gGsea$widths, gGene$widths)
gGsea$widths = as.list(maxWidth)
gGene$widths = as.list(maxWidth)
dev.off()
pdf(file=paste(dir,"32multiGSEA.pdf",sep=""), width=10,height=6)
par(mar=c(5,5,2,5))
grid.arrange(arrangeGrob(gGsea,gGene,nrow=2,heights=c(.8,.3)))
dev.off()

survivalDiscrete

数据导入

#设置路径
path <-paste(dir,"33survivalDiscrete/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",  header=T, sep="\t", check.names=F)

数据处理

var="Stage"         #用于生存分析的变量
rt=rt[,c("futime","fustat",var)]
colnames(rt)[3]="Type"
groupNum=length(levels(factor(rt[,"Type"])))

library(survival)
library(survminer)
#比较组间生存差异的P值
diff=survdiff(Surv(futime, fustat) ~Type,data = rt)
pValue=1-pchisq(diff$chisq,df=(groupNum-1))  #df自由度
if(pValue<0.001){
    pValue="p<0.001"
}else{
    pValue=paste0("p=",sprintf(".03f",pValue))
}
fit <- survfit(Surv(futime, fustat) ~ Type, data = rt)

开始绘制

surPlot=ggsurvplot(fit, 
                   data=rt,
                   conf.int=F,  #置信区间
                   pval=pValue,
                   pval.size=5,
                   legend.labs=levels(factor(rt[,"Type"])),
                   legend.title=var,
                   xlab="Time(years)",
                   break.time.by = 1,
                   risk.table.title="",
                   risk.table=F,
                   risk.table.height=.25)
pdf(file=paste(dir,"33survivalDiscrete.pdf",sep=""),onefile = FALSE,width = 5,height =4.5)
print(surPlot)
dev.off()

survivalContinuous

数据导入

#设置路径
path <-paste(dir,"34survivalContinuous/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",header=T, sep="\t", check.names=F)

数据处理

var="MATN3"                   #用于生存分析的变量
rt=rt[,c("futime","fustat",var)]

library(survival)
#根据中位值,把样品分为两组
group=ifelse(rt[,3]>median(rt[,3]),"High","Low")
diff=survdiff(Surv(futime, fustat) ~group,data = rt)
pValue=1-pchisq(diff$chisq,df=1)
if(pValue<0.001){
    pValue="p<0.001"
}else{
    pValue=paste0("p=",sprintf("%.03f",pValue))
}
fit <- survfit(Surv(futime, fustat) ~ group, data = rt) #剩余数目

开始绘制

library(survminer)
#绘制
surPlot=ggsurvplot(fit, 
                   data=rt,
                   conf.int=TRUE,
                   pval=pValue,
                   pval.size=5,
                   legend.labs=c("High", "Low"),
                   legend.title=var,
                   xlab="Time(years)",
                   break.time.by = 1,
                   risk.table.title="",
                   palette=c("red", "blue"),
                   risk.table=T,
                   risk.table.height=.25)
pdf(file=paste(dir,"34survivalContinuous.pdf",sep=""),onefile = FALSE,width = 6,height =5)
print(surPlot)
dev.off()

survivalCutoff

数据导入

#设置路径
path <-paste(dir,"35survivalCutoff/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T, sep="\t", check.names=F)

数据处理

var="MATN3"                  
rt=rt[,c("futime","fustat",var)]
colnames(rt)=c("futime","fustat","var")

library(survival)
library(survminer)
#获取最优cutoff
res.cut=surv_cutpoint(rt, time = "futime", event = "fustat",variables =c("var"))
res.cut
res.cat=surv_categorize(res.cut)
fit=survfit(Surv(futime, fustat) ~var, data = res.cat)

#比较高低表达生存差异
diff=survdiff(Surv(futime, fustat) ~var,data =res.cat)
pValue=1-pchisq(diff$chisq,df=1)
if(pValue<0.001){
    pValue="p<0.001"
}else{
    pValue=paste0("p=",sprintf("%.03f",pValue))
}

开始绘制

surPlot=ggsurvplot(fit, 
                   data=res.cat,
                   conf.int=TRUE,
                   pval=pValue,
                   pval.size=5,
                   legend.labs=c("High", "Low"),
                   legend.title=var,
                   xlab="Time(years)",
                   break.time.by = 1,
                   risk.table.title="",
                   palette=c("red", "blue"),
                   risk.table=T,
                   risk.table.height=.25)
pdf(file=paste(dir,"35survivalCutoff.pdf",sep=""), onefile = FALSE,width = 6,height =5)
print(surPlot)
dev.off()

survival2vars

数据导入

#设置路径
path <-paste(dir,"36survival2vars/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",header=T, sep="\t", check.names=F)

数据处理

library(survival)
library(survminer)
var1="VCAN"                   #用于生存分析的变量1
var2="CXCR4"                  #用于生存分析的变量2
#根据基因表达,对数据分组
a=ifelse(rt[,var1]<median(rt[,var1]),paste0(var1," low"),paste0(var1," high"))
b=ifelse(rt[,var2]<median(rt[,var2]),paste0(var2," low"),paste0(var2," high"))
Type=paste(a,"+",b)
rt=cbind(rt,Type)

#生存差异统计
length=length(levels(factor(Type)))
diff=survdiff(Surv(futime, fustat) ~Type,data = rt)
pValue=1-pchisq(diff$chisq,df=length-1)
if(pValue<0.001){
    pValue="p<0.001"
}else{
    pValue=paste0("p=",sprintf("%.03f",pValue))
}
fit <- survfit(Surv(futime, fustat) ~ Type, data = rt)

开始绘制

surPlot=ggsurvplot(fit, 
                   data=rt,
                   conf.int=F,  #不含置信区间
                   pval=pValue,
                   pval.size=5,
                   legend.labs=levels(factor(rt[,"Type"])),
                   legend.title="Type",
                   #font.legend=6,
                   legend = "right",
                   xlab="Time(years)",
                   break.time.by = 1,
                   risk.table.title="",
                   risk.table=F,     #风险表格
                   risk.table.height=.25)
pdf(file=paste(dir,"36survival2vars.pdf",sep=""),onefile = FALSE,
    width = 7.5,height =5)
print(surPlot)
dev.off()

forest

数据导入

#设置路径
path <-paste(dir,"37forest/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",header=T,sep="\t",row.names=1,check.names=F)

数据处理

gene=rownames(rt)
hr=sprintf("%.3f",rt$"HR")
hrLow=sprintf("%.3f",rt$"HR.95L")
hrHigh=sprintf("%.3f",rt$"HR.95H")
Hazard.ratio=paste0(hr,"(",hrLow,"-",hrHigh,")")
pVal=ifelse(rt$pvalue<0.001, "<0.001", sprintf("%.3f", rt$pvalue))

开始绘图

#出图格式
pdf(file=paste(dir,"37forest.pdf",sep=""), width = 6, height =4.5)
n=nrow(rt)
nRow=n+1
ylim=c(1,nRow)
layout(matrix(c(1,2),nc=2),width=c(3,2))

#森林图左边的基因信息
xlim = c(0,3)
par(mar=c(4,2,1.5,1.5))
plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,xlab="",ylab="")
text.cex=0.8
text(0,n:1,gene,adj=0,cex=text.cex)
text(1.5-0.5*0.2,n:1,pVal,adj=1,cex=text.cex);text(1.5-0.5*0.2,n+1,'pvalue',cex=text.cex,font=2,adj=1)
text(3,n:1,Hazard.ratio,adj=1,cex=text.cex);text(3,n+1,'Hazard ratio',cex=text.cex,font=2,adj=1,)

#绘制森林图
par(mar=c(4,1,1.5,1),mgp=c(2,0.5,0))
xlim = c(0,max(as.numeric(hrLow),as.numeric(hrHigh)))
plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,ylab="",xaxs="i",xlab="Hazard ratio")
arrows(as.numeric(hrLow),n:1,as.numeric(hrHigh),n:1,angle=90,code=3,length=0.03,col="darkblue",lwd=2.5)
abline(v=1,col="black",lty=2,lwd=2)
boxcolor = ifelse(as.numeric(hr) > 1, 'red', 'blue')
points(as.numeric(hr), n:1, pch = 15, col = boxcolor, cex=1.3)
axis(1)
dev.off()

Nomo

数据导入

#设置路径
path <-paste(dir,"38Nomo/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",header=T,sep="\t",check.names=F,row.names=1)

数据处理

library(rms) 
dd <- datadist(rt)
options(datadist="dd")
#生成函数
f <- cph(Surv(futime, fustat) ~Age+Gender+Grade+T+M+N+VCAN, x=T, y=T, surv=T, 
         data=rt, time.inc=1)
surv <- Survival(f)
#建立nomogram
nom <- nomogram(f, fun=list(function(x) surv(1, x), 
                            function(x) surv(2, x), 
                            function(x) surv(3, x)), 
    lp=F, funlabel=c("1-year survival", "2-year survival", "3-year survival"), 
    maxscale=100, 
    fun.at=c(0.99, 0.9, 0.8, 0.7, 0.5, 0.3,0.1,0.01))  

开始绘图

pdf(file=paste(dir,"38Nomo-1.pdf",sep=""), height=7,width=8.5)
plot(nom)
dev.off()

#calibration curve
time=3   #预测三年calibration
f <- cph(Surv(futime, fustat) ~ Age+Gender+Grade+T+M+N+VCAN, x=T, y=T, surv=T, 
         data=rt, time.inc=time)
cal <- calibrate(f, cmethod="KM", method="boot", 
                 u=time, m=100, B=1000) #m样品数目1/3
pdf(file=paste(dir,"38Nomo-2-cal.pdf",sep=""),height=6,width=7)
plot(cal,
     xlab="Nomogram-Predicted Probability of 3-Year OS",
     ylab="Actual 3-Year OS(proportion)",col="red",sub=F)
dev.off()

ROC

数据导入

#设置路径
path <-paste(dir,"39ROC/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",header=T,sep="\t",check.names=F,row.names=1)

开始绘制

library(pROC)             
x="TTYH3"                  #用于ROC的变量
y=colnames(rt)[1]
rocobj1=roc(rt[,y], as.vector(rt[,x]))
pdf(file=paste(dir,"39ROC.pdf",sep=""),width=5,height=5)
plot(rocobj1, print.auc=TRUE, col="red")
dev.off()

multiVarROC

数据导入

#设置路径
path <-paste(dir,"40multiVarROC/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",header=T,sep="\t",check.names=F,row.names=1)  

开始绘图

library(pROC)              
y=colnames(rt)[1]
#定义颜色
bioCol=c("red","blue","green","yellow")
if(ncol(rt)>4){
    bioCol=rainbow(ncol(rt))}

#绘制
pdf(file=paste(dir,"40multiVarROC.pdf",sep=""),width=5,height=5)
roc1=roc(rt[,y], as.vector(rt[,2]))
aucText=c( paste0(colnames(rt)[2],", AUC=",sprintf("%0.3f",auc(roc1))) )
plot(roc1, col=bioCol[1])
for(i in 3:ncol(rt)){
    roc1=roc(rt[,y], as.vector(rt[,i]))
    lines(roc1, col=bioCol[i-1])
    aucText=c(aucText, paste0(colnames(rt)[i],", AUC=",sprintf("%0.3f",auc(roc1))) )
}
legend("bottomright", aucText,lwd=2,bty="n",col=bioCol[1:(ncol(rt)-1)])
dev.off()

timeROC

数据导入

#设置路径
path <-paste(dir,"41timeROC/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",  header=T, sep="\t", check.names=F)

开始绘制

library(survival)
library(survminer)
library(timeROC)
var="score"                #需要的变量
ROC_rt=timeROC(T=rt$futime, delta=rt$fustat,
               marker=rt[,var], cause=1,
               weighting='aalen',
               times=c(1), ROC=TRUE) #预测时间1年
pdf(file=paste(dir,"41timeROC.pdf",sep=""),width=5, height=5)
plot(ROC_rt, time=1, col='red', title=FALSE, lwd=2)
text(0.65,0.45,paste0('AUC = ',sprintf("%.03f",ROC_rt$AUC[2])),cex=1.2)
dev.off()

multiTimeROC

数据导入

#设置路径
path <-paste(dir,"42multiTimeROC/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T, sep="\t", check.names=F)

开始绘制

library(survival)
library(survminer)
library(timeROC)
var="score"               
#绘制
ROC_rt=timeROC(T=rt$futime, delta=rt$fustat,
               marker=rt[,var], cause=1,
               weighting='aalen',
               times=c(1,2,3), ROC=TRUE)
pdf(file=paste(dir,"42multiTimeROC.pdf",sep=""),width=5,height=5)
plot(ROC_rt,time=1,col='green',title=FALSE,lwd=2)
plot(ROC_rt,time=2,col='blue',add=TRUE,title=FALSE,lwd=2)
plot(ROC_rt,time=3,col='red',add=TRUE,title=FALSE,lwd=2)
legend('bottomright',
       c(paste0('AUC at 1 years: ',sprintf("%.03f",ROC_rt$AUC[1])),
         paste0('AUC at 2 years: ',sprintf("%.03f",ROC_rt$AUC[2])),
         paste0('AUC at 3 years: ',sprintf("%.03f",ROC_rt$AUC[3]))),
         col=c("green",'blue','red'),lwd=2,bty = 'n')
dev.off()

multiVarTimeROC

数据导入

#设置路径
path <-paste(dir,"43multiVarTimeROC/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T, sep="\t", check.names=F, row.names=1)

开始绘制

library(survival)
library(survminer)
library(timeROC)
#颜色
bioCol=rainbow(ncol(rt)-2)
#绘制
aucText=c()
pdf(file=paste(dir,"43multiVarTimeROC.pdf",sep=""),width=6,height=6)
i=3
ROC_rt=timeROC(T=rt$futime,delta=rt$fustat,marker=rt[,i],cause=1,weighting='aalen',times=c(1),ROC=TRUE)
plot(ROC_rt,time=1,col=bioCol[i-2],title=FALSE,lwd=2)
aucText=c(paste0(colnames(rt)[i],", AUC=",sprintf("%.3f",ROC_rt$AUC[2])))
abline(0,1)

for(i in 4:ncol(rt)){
    ROC_rt=timeROC(T=rt$futime,delta=rt$fustat,marker=rt[,i],cause=1,weighting='aalen',times=c(1),ROC=TRUE)
    plot(ROC_rt,time=1,col=bioCol[i-2],title=FALSE,lwd=2,add=TRUE)
    aucText=c(aucText,paste0(colnames(rt)[i],", AUC=",sprintf("%.3f",ROC_rt$AUC[2])))
}
legend("bottomright", aucText,lwd=2,bty="n",col=bioCol[1:(ncol(rt)-2)])
dev.off()

PCA

数据导入

#设置路径
path <-paste(dir,"44PCA/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt", header=T, sep="\t", check.names=F, row.names=1)

数据处理

data=rt[,c(2:ncol(rt))]
Type=rt[,1]
var=colnames(rt)[1]
#PCA分析
data.pca=prcomp(data, scale. = TRUE)
pcaPredict=predict(data.pca)
PCA = data.frame(PC1=pcaPredict[,1], PC2=pcaPredict[,2], Type=Type)
#定义颜色
col=c("red","blue")
if(length(levels(factor(Type)))>2){
    col=rainbow(length(levels(factor(Type))))}

开始绘图

library(ggplot2)         
#绘制PCA
pdf(file=paste(dir,"44PCA.pdf",sep=""),height=4.5, width=5.5)       #???????????ļ?
p=ggplot(data = PCA, aes(PC1, PC2)) + geom_point(aes(color = Type)) +
         scale_colour_manual(name=var,  values =col)+
         theme_bw()+
         theme(plot.margin=unit(rep(1.5,4),'lines'))+
         theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
print(p)
dev.off()

PCA3d

数据导入

#设置路径
path <-paste(dir,"45PCA3d/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",header=T,sep="\t",check.names=F,row.names=1)

数据处理

data=rt[,c(2:ncol(rt))]
Type=rt[,1]  #提取分组信息
var=colnames(rt)[1]

#颜色
group=levels(factor(Type))
bioCol=c("red","blue","green","yellow")
col= bioCol[match(Type,group)]

#PCA分析
data.pca=prcomp(data, scale. = TRUE)
pcaPredict=predict(data.pca)

开始绘图

library(scatterplot3d)      
pdf(file=paste(dir,"45PCA3d.pdf",sep=""), height=5, width=6)
par(oma=c(0.5,0.5,0.5,0.5))
s3d=scatterplot3d(pcaPredict[,1:3], pch = 16, color=col)
legend("top", legend =group,pch = 16, inset = -0.2, box.col="white",xpd = TRUE, horiz = TRUE,col=bioCol[1:length(group)])
dev.off()

circos

数据导入

#设置路径
path <-paste(dir,"46circos/",sep="")
#设置工作目录
setwd(path)
#读取数据
bed1 = read.table("bed1.txt", header=T, sep="\t", check.names=F)  
bed2 = read.table("bed2.txt", header=T, sep="\t", check.names=F)

#install.packages(“circlize”)

开始绘制

library(circlize) 
pdf(file=paste(dir,"46circos.pdf",sep=""),width=7, height=7)
circos.par("track.height" = 0.2, cell.padding = c(0, 0, 0, 0))
circos.initializeWithIdeogram()
circos.genomicLink(bed1, bed2, col =rainbow(nrow(bed1)), border = NA)#连线
circos.clear()
dev.off()

genome

数据导入

#设置路径
path <-paste(dir,"47genome/",sep="")
#设置工作目录
setwd(path)
#读取数据
rt=read.table("input.txt",header=T, sep="\t", check.names=F)   

数据处理

#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("karyoploteR")  
library(karyoploteR)       
#提取数据
data.points=makeGRangesFromDataFrame(rt)
mcols(data.points)=data.frame(y=rt[,4])

开始绘制

ymin=quantile(rt[,4],0.1)
ymax=quantile(rt[,4],0.9)*2
pdf(file=paste(dir,"47genome.pdf",sep=""),width=10, height=7)
kp=plotKaryotype("hg38", plot.type=1)
kpDataBackground(kp, data.panel=1, color="white")
kpPoints(kp, data=data.points, pch=".", col="red", ymin=ymin, ymax=ymax, cex=6)
kpAddBaseNumbers(kp, tick.dist=10000000, minor.tick.dist=1000000)
dev.off()

ggtree

加载R包

#install.packages("colorspace")
#install.packages("ggplot2")
#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("ggtree")

library("ggplot2")
library("ggtree")
library("colorspace")

数据导入

#设置路径
path <-paste(dir,"48ggtree/",sep="")
#设置工作目录
setwd(path)
#读取数据
treFile="input.tre"           #进化树文件
groupFile="group.txt"         #树枝分类
outFile="tree.pdf"            #输出
#读取属性文件,把属性信息保存到list
cls=list()
rt=read.table(groupFile,sep="\t",header=T)
for(i in 1:nrow(rt)){
    otu=as.character(rt[i,1])
    phylum=as.character(rt[i,2])
    cls[[phylum]]=c(cls[[phylum]], otu)
}
phylumNames=names(cls)
phylumNum=length(phylumNames)

#读取进化树文件,和属性文件合并
tree=read.tree(treFile)
tree=groupOTU(tree, cls)

开始绘制

#绘制
pdf(file=paste(dir,"48ggtree.pdf",sep=""),width=7, height=7)
ggtree(tree, 
       layout="circular", 
       ladderize = F, 
       branch.length="none", 
       aes(color=group)) + 
       scale_color_manual(values=c(rainbow_hcl(phylumNum+1)),
                          breaks=phylumNames, 
                          labels=phylumNames ) + 
       theme(legend.position="right") + 
       geom_text(aes(label=paste("                ",label,sep=""), 
       angle=angle+45), 
       size=2)
dev.off()

waterfall

数据导入

#设置路径
path <-paste(dir,"49waterfall/",sep="")
#设置工作目录
setwd(path)
#读取数据
#if (!require("BiocManager"))
#    install.packages("BiocManager")
#BiocManager::install("maftools") 
library(maftools)            
maf=read.maf(maf = 'input.maf') 

开始绘制

pdf(file=paste(dir,"49waterfall.pdf",sep=""),width=7,height=6)
oncoplot(maf = maf, top = 30, fontSize = 0.8 ,showTumorSampleBarcodes = F )
dev.off()

gganatogram

数据导入

#设置路径
path <-paste(dir,"50gganatogram/",sep="")
#设置工作目录
setwd(path)
#source("https://neuroconductor.org/neurocLite.R")
#neuro_install("gganatogram")

#install.packages("devtools")
#library(devtools)
#devtools::install_github("jespermaag/gganatogram")

#读取数据
rt=read.table("male.txt" , header=T, sep="\t", check.names=F, stringsAsFactors=F)   

开始绘制

gender="male"                 
library(gganatogram)
library(dplyr)
library(viridis)
library(gridExtra)
pdf(file=paste(dir,"50gganatogram.pdf",sep=""), width=8,height=6)
gganatogram(data=rt, fillOutline='white', 
            organism='human', 
            sex=gender, fill="value")+ 
  theme_void() +
  scale_fill_gradient2(low = "green", mid="black", high = "red",midpoint = 3)
dev.off()

更多课程

敬请期待